In [18]:
# Part 1
# install necessary packages
!pip install folium
import folium
import requests
import pandas
from folium.plugins import HeatMap
Requirement already satisfied: folium in /opt/conda/lib/python3.8/site-packages (0.12.1)
Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from folium) (2.25.1)
Requirement already satisfied: branca>=0.3.0 in /opt/conda/lib/python3.8/site-packages (from folium) (0.4.2)
Requirement already satisfied: jinja2>=2.9 in /opt/conda/lib/python3.8/site-packages (from folium) (2.11.2)
Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from folium) (1.19.5)
Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.8/site-packages (from jinja2>=2.9->folium) (1.1.1)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (1.26.2)
Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (4.0.0)
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (2020.12.5)
Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (2.10)
In [19]:
# download the required data and perform pre-processing of the DataFrame:
arrest_table = pandas.read_csv("https://cmsc320.github.io/files/BPD_Arrests.csv")

arrest_table = arrest_table[pandas.notnull(arrest_table["Location 1"])]

arrest_table["lat"], arrest_table["long"] = arrest_table["Location 1"].str.split(",").str
arrest_table["lat"] = arrest_table["lat"].str.replace("(", "").astype(float)
arrest_table["long"] = arrest_table["long"].str.replace(")", "").astype(float)
<ipython-input-19-e9771d7ab198>:5: FutureWarning: Columnar iteration over characters will be deprecated in future releases.
  arrest_table["lat"], arrest_table["long"] = arrest_table["Location 1"].str.split(",").str
Out[19]:
arrest age race sex arrestDate arrestTime arrestLocation incidentOffense incidentLocation charge chargeDescription district post neighborhood Location 1 lat long
91923 12556633.0 26 B M 09/20/2012 23:45:00 4500 Mannasota Ave 87O-Narcotics (Outside) 4500 Mannasota Av NaN Unknown Charge NORTHEASTERN 436.0 Frankford (39.3284868683, -76.5592061768) 39.328487 -76.559206
40348 11336848.0 29 B M 10/03/2011 04:30:00 2300 Garrison Blvd 87-Narcotics 2300 Garrison Bd 1 0573 Cds: Possession-Marihuana || Cds NORTHWESTERN 621.0 Garwyn Oaks (39.3143607362, -76.6766876816) 39.314361 -76.676688
65984 12450662.0 33 B M 03/30/2012 11:40:00 3400 Oldyork Rd 4E-Common Assault 3400 Old York Rd 1 1415 Asslt-Sec Degree || 2Nd Degree Assault NORTHERN 525.0 Waverly (39.3299390984, -76.6083116715) 39.329939 -76.608312
97555 12581158.0 27 W F 11/01/2012 15:30:00 1200 Steelton Ave 97-Search & Seizure 1200 Steelton Av 5 3550 Cds:Poss Para || Cds Violation SOUTHEASTERN 233.0 Graceland Park (39.2797310267, -76.5362566117) 39.279731 -76.536257
13552 11190802.0 31 B M 04/10/2011 03:00:00 1000 Bonaparte Ave 4E-Common Assault 1000 Bonaparte Av 1 1415 Asslt-Sec Degree || Assault(Domestic) EASTERN 312.0 East Baltimore Midway (39.3161191816, -76.6012690652) 39.316119 -76.601269
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
74082 12484973.0 40 B M 05/24/2012 21:30:00 500 Bloom St 87-Narcotics 500 Bloom St 4 3550 Cds:Possess-Not Marihuana || Cds CENTRAL 135.0 Druid Heights (39.3081857325, -76.6365285678) 39.308186 -76.636529
88587 12542777.0 60 B F 08/30/2012 09:45:00 2800 Westwood Ave 87-Narcotics 2900 Westwood Av 4 3550 Cds:Possess-Not Marihuana || Cds SOUTHWESTERN 811.0 Northwest Community Action (39.3082003001, -76.6648678896) 39.308200 -76.664868
86871 12535437.0 47 U M 08/19/2012 01:40:00 0 S Highland Ave Unknown Offense NaN NaN Unknown Charge SOUTHEASTERN 224.0 Baltimore Highlands (39.2921348214, -76.5693522951) 39.292135 -76.569352
59840 12426328.0 25 B M 02/20/2012 00:20:00 2200 Westwood St Unknown Offense NaN NaN Unknown Charge WESTERN 735.0 Coppin Heights/Ash-Co-East (39.3086294290, -76.6532037825) 39.308629 -76.653204
29985 11278883.0 18 W M 07/24/2011 22:00:00 2200 North Ave 87-Narcotics 1900 E North Av 1 0573 Cds: Possession-Marihuana || Poss Of Cocaine EASTERN 331.0 South Clifton Park (39.3124979718, -76.5873908243) 39.312498 -76.587391

500 rows × 17 columns

In [47]:
# Create a random sample of the arrest_data. This random sample will only account for 1.5% of the original data.
sample = arrest_table.sample(frac=0.015, replace=True, random_state=1)
sample
Out[47]:
arrest age race sex arrestDate arrestTime arrestLocation incidentOffense incidentLocation charge chargeDescription district post neighborhood Location 1 lat long
102477 12601794.0 27 B M 12/11/2012 22:30:00 2800 Huntingdon Ave 87-Narcotics 2800 Huntingdon Av NaN Unknown Charge NORTHERN 511.0 Remington (39.3207605205, -76.6228419114) 39.320761 -76.622842
55844 12408657.0 25 B M 01/23/2012 11:10:00 2900 Arunah St 87O-Narcotics (Outside) N Longwood St & Arunah Av 4 3550 Cds:Possess-Not Marihuana || Dist Cds SOUTHWESTERN 815.0 Franklintown Road (39.2952463082, -76.6652749899) 39.295246 -76.665275
21444 11229420.0 20 B M 05/30/2011 18:15:00 5800 Edgepark Rd 4E-Common Assault 5800 Edgepark Rd 1 1415 Asslt-Sec Degree || Aggravated Assault NORTHEASTERN 423.0 Loch Raven (39.3585884417, -76.5757069675) 39.358588 -76.575707
9337 11169784.0 19 B F 03/12/2011 00:30:00 3600 Hayward Ave 24-Towed Vehicle 3600 Hayward Av 1 0573 Cds: Possession-Marihuana || Cds Violation NORTHWESTERN 634.0 Arlington (39.3481164130, -76.6816907390) 39.348116 -76.681691
55050 12404820.0 48 B M 01/17/2012 14:30:00 900 Bennette Pl 87-Narcotics 900 Bennett Pl 5 3550 Cds:Poss Para || Trespassing WESTERN 714.0 Harlem Park (39.2949861350, -76.6326646183) 39.294986 -76.632665
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
81 11127199.0 33 W M 01/01/2011 19:50:00 1600 Locust St 4E-Common Assault 1600 Locust St 1 1415 Asslt-Sec Degree || Common Assault SOUTHERN 911.0 Curtis Bay (39.2268317014, -76.5877926584) 39.226832 -76.587793
87461 12537903.0 58 B F 08/22/2012 21:20:00 2000 Boone St 97-Search & Seizure 2000 Boone St 1 0621 NaN EASTERN 311.0 East Baltimore Midway (39.3131229414, -76.6083646725) 39.313123 -76.608365
67092 12454863.0 36 W M 04/06/2012 19:00:00 2900 Odonnell St 4E-Common Assault 2900 Odonnell St 1 1415 Asslt-Sec Degree || 2Nd Degree Assault SOUTHEASTERN 217.0 Canton (39.2803587450, -76.5753189334) 39.280359 -76.575319
71133 12472278.0 23 B F 05/04/2012 09:41:00 4100 Patterson St 4E-Common Assault 4100 Patterson Av 1 1415 Asslt-Sec Degree || Common Assault NORTHWESTERN 631.0 Reisterstown Station (39.3552966589, -76.7034792173) 39.355297 -76.703479
9109 11168875.0 34 B M 03/10/2011 14:20:00 0 N Bentalou St 4E-Common Assault 0 N Bentalou St 1 1425 Reckless Endangerment || Common Assault Domestic WESTERN 721.0 Penrose/Fayette Street Outreach (39.2887779886, -76.6528770239) 39.288778 -76.652877

958 rows × 17 columns

In [64]:
# Part 2 & 3
# Heat map showing where crime occurs the most on the map of Baltimore
map_osm = folium.Map(location=[39.29, -76.61], zoom_start=11)
heat_data = [[row['lat'],row['long']] for index, row in sample.iterrows()]
HeatMap(heat_data,radius=20).add_to(map_osm)
map_osm

# create markers based on the age of my random sample
for index, row in sample.iterrows():
    color=''
    if row['age'] < 20:
        color = 'purple'
    if row['age'] >= 20 and row['age'] < 30:
        color = 'black'
    if row['age'] >= 30 and row['age'] < 40:
        color = 'white'
    if row['age'] >= 40 and row['age'] < 50:
        color = 'green'
    if row['age'] >= 50 and row['age'] < 60:
        color = 'orange'
    if row['age'] >= 60:
        color = 'blue'
    folium.Circle(
    radius = 50,
    location = [row['lat'], row['long']],
    popup = "\nOffense: " + row['incidentOffense'] + "\nAge: " + str(row['age']) + "\nRace: " + row['race'] + "\nSex: " + row['sex'],
    color = color,
    fill = True,
).add_to(map_osm)
    
map_osm
Out[64]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# This map is meant to show the relationship between age and crime in the Baltimore area. Initially, I thought 
# that the map would show a heavy concentration of those under the age of 20. Yet, upon seeing this map, I saw that
# there wasn't a distinctive pattern, meaning there wasn't a single age group (marker) that proved prevalent for Baltimore
# crime. Although there wasn't a distinct group that took account for a majority of markers in the map, I saw that 
# the age groups from 20 to 50 were responsible for a majority of the crime in Baltimore, seeing as
# there markers (black, white, and green) were most visible. Otherwise, the data becomes 
# scattered for the other age groups ( greater than 50 and less than 20).
In [49]:
# Heat map showing where crime occurs the most on the map of Baltimore
map_sex = folium.Map(location=[39.29, -76.61], zoom_start=11)
heat_data = [[row['lat'],row['long']] for index, row in sample.iterrows()]
HeatMap(heat_data,radius=20).add_to(map_sex)

# create markers based on the sex of those in my random sample
for index, row in sample.iterrows():
    if row['sex'] == 'F':
        color = 'white'
    if row['sex'] == 'M':
        color = 'black'
    folium.Circle(
    radius=40,
    location=[row['lat'], row['long']],
    popup = "\nOffense: " + row['incidentOffense'] + "\nAge: " + str(row['age']) + "\nRace: " + row['race'] + "\nSex: " + row['sex'],
    color=color,
    fill=True,
).add_to(map_sex)
    
map_sex
Out[49]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# From this map, I saw that men are responsible for a majority of the crime in Baltimore. Although women clearly 
# partake in crime, as shown in this map, it is not as prevalent as men. I am able to interpret this based on the
# heat map that shows where most crime takes place in Baltimore, and black markers (men) become dense in
# the areas of high crime, unlike the white markers (women).
In [55]:
# Heat map showing where crime occurs the most on the map of Baltimore
map_race = folium.Map(location=[39.29, -76.61], zoom_start=11)
heat_data = [[row['lat'],row['long']] for index, row in sample.iterrows()]
HeatMap(heat_data,radius=20).add_to(map_race)

# create markers based on race of those in my random sample
for index, row in sample.iterrows():
    if row['race'] == 'B':
        color = 'black'
    if row['race'] == 'A':
        color = 'yellow'
    if row['race'] == 'H':
        color = 'white'
    if row['race'] == 'I':
        color = 'green'
    if row['race'] == 'U':
        color = 'blue'
    if row['race'] == 'W':
        color = 'white'
    folium.Circle(
    radius=40,
    location=[row['lat'], row['long']],
    popup = "\nOffense: " + row['incidentOffense'] + "\nAge: " + str(row['age']) + "\nRace: " + row['race'] + "\nSex: " + row['sex'],
    color=color,
    fill=True,
).add_to(map_race)

map_race
Out[55]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# As shown in the previous maps, this map has a heat map that shows where there is high level of crime versus a low
# level of crime. Using markers with unique colors, I grouped the crime based on race alone. In doing so, 
# I was able to interpret the map and the relationship between race and crime. As shown in the map, I saw that 
# a majority of crime is acted out by the black markers (African-Americans), and white markers (white people). 
# Still, the map shows that black markers become most dense around high levels of crimes, thus proving the point that
# African-Americans are responsible for a majority of crime in Baltimore. Besides these two racial groups, I saw that 
# other groups were not responsible for as much crime, as there markers become scattered around areas of crime.
In [ ]:
# Based on the three maps, I saw that a majority of crime came from black males, from all ages (although a maojrity
# were older than 20 and younger than 50).